Author

Diego Cruz Aguilar

Published

November 8, 2024

Modified

April 11, 2026

1 Librerías

Code
import time
import numpy as np
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
from itables import show
from utils.mimic_load import load_csv, segment_signal_by_label

2 Variables globales

Code
RANDOM_SEED = int(time.time())
USER_SELECT = 1
SEGMENT_SIZE = 3750  # Equivalente a 30s de muestras
FS = 1250
FOLDER_PATH = "./dataset/mimic_perform_af_csv"

3 Carga de datos

Code
final_df = load_csv(FOLDER_PATH, USER_SELECT)
filtered_df = final_df[final_df['label'] == 0]

4 Conocer los datos

4.1 Ver los primeros datos y sus cabeceras

Code
final_df.head()
Time PPG ECG resp numb_user label
0 0.000 0.537634 0.425781 -0.029340 1 0
1 0.008 0.534702 0.404297 -0.036675 1 0
2 0.016 0.531769 0.400391 -0.044010 1 0
3 0.024 0.528837 0.400391 -0.053790 1 0
4 0.032 0.524927 0.419922 -0.061125 1 0

4.2 Informacion de los datos

Code
final_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2100014 entries, 0 to 2100013
Data columns (total 6 columns):
 #   Column     Dtype  
---  ------     -----  
 0   Time       float64
 1   PPG        float64
 2   ECG        float64
 3   resp       float64
 4   numb_user  int64  
 5   label      int64  
dtypes: float64(4), int64(2)
memory usage: 96.1 MB

4.3 Descripcion de los datos

Code
final_df.describe()
Time PPG ECG resp numb_user label
count 2.100014e+06 2.100014e+06 2.100014e+06 2.100014e+06 2.100014e+06 2.100014e+06
mean 6.000000e+02 1.193070e+00 3.920216e-01 2.226942e-01 1.057143e+01 9.285714e-01
std 3.464126e+02 7.767317e-01 2.349270e-01 4.319208e-01 5.827452e+00 2.575394e-01
min 0.000000e+00 0.000000e+00 -5.019608e-01 -1.846506e+00 1.000000e+00 0.000000e+00
25% 3.000000e+02 4.633431e-01 2.346041e-01 -4.156479e-02 6.000000e+00 1.000000e+00
50% 6.000000e+02 8.064516e-01 3.847656e-01 2.200000e-01 1.150000e+01 1.000000e+00
75% 9.000000e+02 1.893451e+00 5.195312e-01 4.643077e-01 1.600000e+01 1.000000e+00
max 1.200000e+03 4.001955e+00 1.503922e+00 2.844215e+00 1.900000e+01 1.000000e+00

4.4 Tamaño de los datos

Code
final_df.shape
(2100014, 6)

4.5 Vusualizar su distribución

Code
columns_to_plot = ["PPG", "resp"]
plot_df = filtered_df[columns_to_plot]

fig = px.histogram(plot_df.melt(var_name="Variable", value_name="Valor"),
                   x="Valor",
                   color="Variable",
                   facet_row="Variable",  # Cambiar facet_col por facet_row
                   title="Distribuciones de las Variables Seleccionadas",
                   nbins=50)
fig.show()

4.6 Detección de valores atípicos

Code
fig = go.Figure()

fig.add_trace(go.Box(y=filtered_df['PPG'], name='PPG'))
fig.add_trace(go.Box(y=filtered_df['resp'], name='Resp'))

fig.update_layout(
    title="Boxplots para Identificación de Valores Atípicos",
    xaxis_title="Variables",
    yaxis_title="Valores",
    template="plotly_white"
)

fig.show()

4.7 Analisis de correlacion

Code
fig = px.scatter(filtered_df, x='PPG', y='resp', title='Scatter Plot: PPG vs resp')
fig.show()

4.8 Reconstruir la señal(raw)

Code
fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                    subplot_titles=("PPG Signal", "Respiration Signal"),
                    vertical_spacing=0.1)

fig.add_trace(go.Scatter(x=filtered_df["Time"], y=filtered_df["PPG"], mode="lines", name="PPG"), row=1, col=1)

fig.add_trace(go.Scatter(x=filtered_df["Time"], y=filtered_df["resp"], mode="lines", name="Resp"), row=2, col=1)

fig.update_layout(height=600, width=800, title_text="Señales PPG y Respiración", showlegend=False)
fig.update_xaxes(title_text="Time (s)", row=2, col=1)
fig.update_yaxes(title_text="PPG", row=1, col=1)
fig.update_yaxes(title_text="Resp", row=2, col=1)

fig.show()
Figure 1: Reconstruccion de la señal de uno de los usuarios

4.9 Analisis de frequencias

Code
# Frecuencia de muestreo
fs = 125  # Hz
d = 1 / fs  # Intervalo de muestreo (segundos)

# Extraer señales del dataset
ppg_signal = filtered_df['PPG'].values
resp_signal = filtered_df['resp'].values

# Transformada de Fourier en la señal PPG
ppg_freq = np.fft.fft(ppg_signal)
ppg_frequencies = np.fft.fftfreq(len(ppg_freq), d=d)

# Transformada de Fourier en la señal respiratoria
resp_freq = np.fft.fft(resp_signal)
resp_frequencies = np.fft.fftfreq(len(resp_freq), d=d)

# Filtrar frecuencias positivas y dentro de los rangos de interés
# PPG: 0.5 Hz a 3 Hz
positive_ppg_freqs = ppg_frequencies[:len(ppg_frequencies) // 2]
positive_ppg_amplitudes = np.abs(ppg_freq)[:len(ppg_freq) // 2]
ppg_mask = (positive_ppg_freqs > 0.5) & (positive_ppg_freqs < 3)

# RESP: 0.1 Hz a 0.5 Hz
positive_resp_freqs = resp_frequencies[:len(resp_frequencies) // 2]
positive_resp_amplitudes = np.abs(resp_freq)[:len(resp_freq) // 2]
resp_mask = (positive_resp_freqs > 0.1) & (positive_resp_freqs < 0.5)

fig = make_subplots(
    rows=2, cols=1,
    shared_xaxes=False,
    vertical_spacing=0.1,
    subplot_titles=("Frecuencia de PPG", "Frecuencia de Respiración")
)

fig.add_trace(go.Scatter(
    x=positive_ppg_freqs[ppg_mask],
    y=positive_ppg_amplitudes[ppg_mask],
    mode='lines',
    name='PPG'
), row=1, col=1)

fig.add_trace(go.Scatter(
    x=positive_resp_freqs[resp_mask],
    y=positive_resp_amplitudes[resp_mask],
    mode='lines',
    name='Respiración'
), row=2, col=1)

fig.update_layout(
    title="Análisis de Frecuencia: PPG y Respiración",
    height=800,
    template="plotly_white"
)

fig.update_xaxes(title_text="Frecuencia (Hz)", range=[0.5, 3], row=1, col=1)  # Rango para PPG
fig.update_xaxes(title_text="Frecuencia (Hz)", range=[0.1, 0.5], row=2, col=1)  # Rango para RESP
fig.update_yaxes(title_text="Amplitud", row=1, col=1)
fig.update_yaxes(title_text="Amplitud", row=2, col=1)

fig.show()

5 Experimentos

5.1 Segmentar los datos

Code
segments_ppg, segments_resp, segment_labels, user_ids = segment_signal_by_label(
    final_df, SEGMENT_SIZE
)

middle_segment = len(segments_ppg) // 2
segments_ppg[middle_segment], segments_resp[middle_segment]
(array([2.15249267, 2.22091887, 2.285435  , ..., 1.94330401, 2.08797654,
        2.22385142]),
 array([-0.02626756, -0.02565669, -0.02504582, ..., -0.06108735,
        -0.06047648, -0.05986561]))

5.2 Obtener la fingerprint de la union de las señales(PPG y respiración)

Code
from utils.extact_features import compute_features_parallel
combined_features = compute_features_parallel(segments_ppg, segments_resp, FS)
combined_features[0], len(combined_features)
(array([ 1.36472759e+00,  2.70545818e-01, -9.22594233e-01,  1.25886370e-01,
         1.98144061e+00,  8.24251787e-01,  9.17510633e+02,  6.14145276e+00,
        -1.80968071e-04,  8.35478571e-03,  9.32966678e-01,  6.03824831e-01,
         2.91382149e-02, -2.04860227e-02,  3.31018818e-02,  5.75799279e+00,
        -7.45894319e-06,  1.42086863e-03,  1.42183565e+01,  6.53088052e-01,
         1.40151520e-02, -8.57327975e-03,  1.90182574e-03,  6.43393434e+00,
        -6.37711225e-06,  3.73592271e-04,  1.74541833e+00, -2.17755297e-01,
         1.66478462e-03, -2.55753171e-03,  2.62191060e-04,  7.24331034e+00,
         6.19581879e-02,  7.24997586e-01, -4.86277977e-01,  9.34663499e-01,
         1.90172755e+00, -9.71714161e-01,  2.54670412e+02,  5.96627214e+00,
        -9.52988670e-05,  4.39476325e-03,  1.85439603e+01,  1.20304746e-01,
         3.27447597e-02, -3.27447597e-02,  9.29437547e-03,  5.61846673e+00,
        -8.06641631e-05,  1.23739345e-03,  5.59740223e-01, -4.65897730e-02,
         5.74069967e-03, -5.74069967e-03,  1.45769151e-03,  6.54296586e+00,
         8.63515085e-05,  5.93021050e-04, -5.57631423e-01,  1.88280548e-02,
         1.72886744e-03, -1.72886744e-03,  6.75883693e-04,  7.10000314e+00,
         4.96853702e-01,  2.83236494e-01, -1.12771811e+00, -3.25932656e-04,
         8.32995690e-01,  4.51104214e-01,  6.14738564e-01,  9.89010989e+02,
         3.40136054e+02]),
 560)

5.3 Seleccionar clasificadores

Code
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

# from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

classifiers = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=RANDOM_SEED),
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=RANDOM_SEED),
    "KNN": KNeighborsClassifier(),
    "SVM": SVC(random_state=RANDOM_SEED),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes Gaussiano": GaussianNB(),
}

5.4 Probar clasificadores


Probando clasificador: Random Forest

Probando clasificador: Logistic Regression

Probando clasificador: KNN

Probando clasificador: SVM

Probando clasificador: Decision Tree

Probando clasificador: Naive Bayes Gaussiano

6 Resultados

6.1 Comparativa de los resultados(distintas formas de combinar y extraer las caracteristicas de las señales)

Code
df_ranking = pd.read_csv("./ranking_features.csv")
df_best = df_ranking.sort_values(by='Cross-Validation Accuracy', ascending=False)
show(df_best)
Feature Extraction Model Cross-Validation Accuracy Test Accuracy
Loading ITables v2.2.3 from the internet... (need help?)

6.2 Indicadores de los resultados

Code
df_results = pd.DataFrame(results)
df_results.to_csv("One-to-Many-Results.csv", index=False)
df_ordenado = df_results.sort_values(by='accuracy', ascending=False)
show(df_ordenado)
accuracy precision f1 fpr_weighted fnr_weighted recall_weighted grr model
Loading ITables v2.2.3 from the internet... (need help?)

6.3 Gráfico de líneas de precisión de los clasificadores

Code
# Crear gráfico de líneas
fig = go.Figure()
unique_models = df_results['model'].unique()
# Trazar los datos para cada clasificador
for name in unique_models:
    # Filtrar los datos para el clasificador actual
    model_data = df_results[df_results['model'] == name]

    # Agregar el trazo
    fig.add_trace(
        go.Scatter(
            x=model_data.index + 1,  # Índices ajustados como número de prueba
            y=model_data['accuracy'],  # Precisión
            mode="lines+markers",  # Mostrar líneas y puntos
            name=name,  # Nombre del clasificador
        )
    )

# Configuración del diseño
fig.update_layout(
    title="Comparación de Accuracy entre Clasificadores",
    xaxis_title="Número de Prueba",
    yaxis_title="Accuracy",
    template="plotly_white",
)

# Mostrar gráfico
fig.show()

6.4 Comparacion con el estado del arte

Code
mejor_modelo = df_results.loc[df_results['accuracy'].idxmax()]
models_accuracies = {
    "Pu et al": .9894,
    "Zhao et al static": .96,
    "Zhao et al mivement": .9073,
    "Aly et al": .935,
    "Wu et al": .921,
    "Zhang": .949,
    f"Proposed ({mejor_modelo['model']})": mejor_modelo['accuracy'],
}

# Prepare data
models = list(models_accuracies.keys())
accuracies = list(models_accuracies.values())

# Create the scatter plot
fig = go.Figure(data=[
    go.Scatter(
        x=models,
        y=accuracies,
        mode='markers+lines',
        marker=dict(size=10, color=accuracies, colorscale='Viridis', showscale=True),
        line=dict(dash='solid'),
        name='Accuracy'
    )
])

# Customize layout
fig.update_layout(
    title="Model Accuracies",
    xaxis_title="Models",
    yaxis_title="Accuracy",
    yaxis=dict(range=[0.9, 1.0]),
    template="plotly_white"
)

# Display the chart
fig.show()